import pymongo
from selenium import webdriver
import time

myclient = pymongo.MongoClient('mongodb://47.93.220.108:27017/')
mydb = myclient['movie']
driver = webdriver.Chrome(executable_path='/Users/aaa/py/chromedriver')
mydb.rankingList.delete_many({})
for i in range(0,505,25):
    driver.get('https://www.douban.com/doulist/1641439/?start='+str(i))
    time.sleep(1)
    movie_names = driver.find_elements_by_xpath("//div[@class='title']/a")
    movie_list = driver.find_elements_by_xpath("//div[@class='abstract']")
    movie_officeBoxs = driver.find_elements_by_xpath('//blockquote')
    for name,item,officeBox in zip(movie_names,movie_list,movie_officeBoxs):
        result_str = item.text
        production_areas = result_str.split('制片国家/地区: ')[1].split('\n')[0]
        movie_classes = result_str.split('类型: ')[1].split('\n')[0]
        movie_date = result_str.split('年份: ')[1].split('\n')[0]
        movie_officeBox = float(officeBox.text.split('$')[1].split('\n')[0].replace(',',''))
        try:
            production_area = production_areas.split('/')
        except:
            production_area = [].append(production_areas)

        try:
            movie_class= movie_classes.split(' / ')
        except:
            movie_class = [].append(movie_classes)

        for area in production_area:
            data = {
                'movie_name':name.text,
                'production_area':area,
                'movie_class':movie_class,
                'movie_date':movie_date,            
                'movie_officeBox':movie_officeBox
            }
            mydb.rankingList.insert_one(data)